Loading libraries

library(readxl)                 # load data in xlsx format
library(dplyr)                  # work with dataframes
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)                # create plots
library(sf)                     # encode spatial vector data
## Linking to GEOS 3.6.1, GDAL 2.2.3, proj.4 4.9.3
library(rnaturalearthdata)      # world vector map data
library(leaflet)                # create interactive maps
library(extrafont)              # fonts
## Registering fonts with R

Importing data

GDE_on_R_D <- read_excel("data/GDE_on_R_D.xlsx")

Tidying database and creating dataframes

Dataframe 1: Percentage financed by Government and Industry

financed <- GDE_on_R_D %>%
  select(country, region, percentage_financed_by, financed_by) %>% 
  filter(financed_by %in% c("Industry", "Government"))

head(financed)
## # A tibble: 6 x 4
##   country region  percentage_financed_by financed_by
##   <chr>   <chr>                    <dbl> <chr>      
## 1 Austria Europe                    49.7 Industry   
## 2 Austria Europe                    32.6 Government 
## 3 Belgium Europe                    58.6 Industry   
## 4 Belgium Europe                    22.5 Government 
## 5 Canada  America                   41.6 Industry   
## 6 Canada  America                   32.2 Government

Dataframe 2: Percentage performed by Government, Higher education and Industry

performed <- GDE_on_R_D %>%
  select(country, region, percentage_performed_by, performed_by) %>% 
  filter(performed_by %in% c("Industry", "Higher education", "Government"))

head(performed)
## # A tibble: 6 x 4
##   country   region  percentage_performed_by performed_by    
##   <chr>     <chr>                     <dbl> <chr>           
## 1 Australia Oceania                    53.4 Industry        
## 2 Australia Oceania                    30.6 Higher education
## 3 Australia Oceania                    12.7 Government      
## 4 Austria   Europe                     71.4 Industry        
## 5 Austria   Europe                     23.5 Higher education
## 6 Austria   Europe                      4.6 Government

Dataframe 3: Total researchers in full time

researchers <- GDE_on_R_D %>%
  select(country, total_researchers_in_full_time, year) %>% 
  filter(year=="2015")

head(researchers)
## # A tibble: 6 x 3
##   country    total_researchers_in_full_time  year
##   <chr>                               <dbl> <dbl>
## 1 Austria                             43562  2015
## 2 Belgium                             53178  2015
## 3 Chile                                8175  2015
## 4 Czech Rep.                          38081  2015
## 5 Denmark                             42425  2015
## 6 Estonia                              4187  2015

Creating Bar Plots

Visualizing the percentage financed by Government and Industry

ggplot(financed, 
       aes(x=country, y=percentage_financed_by, fill=financed_by)) +
  geom_bar(stat="identity", width = .8) +
  coord_flip() +
  facet_grid(.~ financed_by) +
  labs(x="", y="", fill="Financed by",
       title = "GROSS DOMESTIC EXPENDITURE ON R&D (OECD MEMBER COUNTRIES)",
       subtitle = "Percentage financed by Government and the Business enterprise sector (2015)",
       caption = "Source: OECD, Main Science and Technology Indicators, 2018. | DataViz: Lorna Campos Matus") +
  theme_minimal() +
  geom_text(aes(label=percentage_financed_by), color="black", family="Arial Rounded MT Bold", size=3, vjust=.38, hjust=-.3)+
  theme(strip.background = element_blank(),
        strip.text.x = element_blank(), 
        axis.text.x = element_blank(),
        axis.text=element_text(size=10, color = "black"),
        title=element_text(color="black",size=10, family="A Love of Thunder")) +
  scale_fill_manual(values=c("#c7093f", "#008b8e")) + 
  scale_color_manual(values=c("#c7093f"="Government", "#008b8e"="Industry"))
knitr::include_graphics("data/Barplot - Financed.png")
No data recorded for Australia

No data recorded for Australia

Visualizing the percentage performed by Government, Higher education and Industry

ggplot(performed, 
       aes(x=country, y=percentage_performed_by, fill=performed_by)) +
  geom_bar(stat="identity") +
  coord_flip() +
  facet_grid(.~performed_by) +
  labs(x="", y="", fill="Performed by",
       title = "GROSS DOMESTIC EXPENDITURE ON R&D (OECD MEMBER COUNTRIES)",
       subtitle = "Percentage performed by Government, Higher education and the Business enterprise sector (2015)",
       caption = "Source: OECD, Main Science and Technology Indicators, 2018. | DataViz: Lorna Campos Matus") +
  theme_hc() +
  geom_text(aes(label=percentage_performed_by), color="black", family="Arial Rounded MT Bold", size=3, vjust=.38, hjust=-.3)+
  theme(strip.background = element_blank(),
        strip.text.x = element_blank(), 
        axis.text.x = element_blank(),
        axis.text=element_text(size=10, color = "black"),
        title=element_text(color="black",size=10, family="A Love of Thunder")) +
  scale_fill_manual(values=c("#c7093f", "#9bb401", "#008b8e")) + 
  scale_color_manual(values=c("#c7093f"="Government", "#9bb401"="Higher education", "#008b8e"="Industry"))
knitr::include_graphics("data/Barplot - Performed.png")

Designing an interactive choropleth map

Let’s start by getting global data and geometries from Natural Earth

Map.stat <- st_as_sf(countries50) %>% 
  select(name, geometry)

Joining and filtering data with geometries

Map.stat1 <-left_join(Map.stat, researchers, by=c("name"="country")) %>% 
  filter(name!="Antarctica", !is.na(total_researchers_in_full_time)) %>% 
  select(name, total_researchers_in_full_time, geometry)

head(Map.stat1)
## Simple feature collection with 6 features and 2 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -109.4341 ymin: -55.8917 xmax: 18.83223 ymax: 55.05874
## epsg (SRID):    4326
## proj4string:    +proj=longlat +datum=WGS84 +no_defs
##          name total_researchers_in_full_time
## 1     Austria                          43562
## 2     Belgium                          53178
## 3 Switzerland                          43740
## 4       Chile                           8175
## 5  Czech Rep.                          38081
## 6     Germany                         387982
##                         geometry
## 1 MULTIPOLYGON (((16.95312 48...
## 2 MULTIPOLYGON (((4.816016 51...
## 3 MULTIPOLYGON (((9.524023 47...
## 4 MULTIPOLYGON (((-67.5752 -5...
## 5 MULTIPOLYGON (((14.80938 50...
## 6 MULTIPOLYGON (((14.19824 53...

Creating the sequential palette

palette <- colorNumeric(c("#f8d6ea", "#dd86e3", "#ca22b9", "#d53091", "#d5047f", "#a50288"), 
                        domain=Map.stat1$total_researchers_in_full_time)

Setting up the pop up text

popup_sb <- paste0("<b>", Map.stat1$name,"</b><br/>",
                   as.character(round(Map.stat1$total_researchers_in_full_time, 1))) %>% 
  lapply(htmltools::HTML)

Now, let’s go to the map!

leaflet(Map.stat1) %>%
  addTiles() %>%
  setView(25.670689, 35.286792, zoom = 2) %>%
  addPolygons(data = Map.stat1, 
              fillColor = ~palette(total_researchers_in_full_time), 
              fillOpacity = .8, 
              weight = .8, 
              smoothFactor = .2,
              highlight = highlightOptions(
                weight = 3,
                color = "ghostwhite",
                fillOpacity = .9,
                bringToFront = TRUE),
              label=popup_sb,
              labelOptions = labelOptions(
                style = list("font-weight" = "normal", padding = "3px 8px"),
                textsize = "13px",
                direction = "auto")) %>%
  addLegend(pal = palette, values = ~total_researchers_in_full_time, 
            position = "bottomright", 
            title = "Total researchers<br />in full time equivalent")

No data recorded for Australia, Canada, Israel and Mexico

What does the data tell us?

  • Mexico registers the highest percentage of government financing, surpassing by 17.2 percent points to second-place, Greece.
  • Japan and Korea register the highest percentage financed by business enterprise sector, reaching 78 % and 74.5 %, respectively.
  • Mexico leads the percentage performed R&D tasks by government (37.9 %); Latvia, by higher education (49.7 %), and Israel, by the business enterprise sector (85.1 %).
  • Precisely, this sector is the one that reach first place in the percentage performed R&D tasks in OECD member countries.
  • And what about the map? Most full-time researchers are located in U.S. (1.379.977), Japan (662.071), Germany (387.982), and Korea (356.447). While, on the opposite side, there’s Iceland (1.944), Luxembourg (2.539), Latvia (3.613), and Estonia (4.187).